Exploring the MDRP Database
  • Home
  • ETL
  • Question I
  • Question II

Question I: Correlation

Author

Chionesu George

Published

July 13, 2023

Modified

November 22, 2023

  • Market Age
  • Days Absent from Market

“How does the correlation between days_to_market and on_market_age change by route of administration?”:

if (!"drug_taxonomy" %in% ls()){
  stop("This section needs to be updated")
  cache_prep(drug_taxonomy) |> cache_save(.cache);
}

if (!"drug_obs_smart_data" %in% ls()){
  stop("This section needs to be updated")
  drug_obs_smart_data <- smart.data$
    new(x = drug_obs_data, name = "drugs")$
    taxonomy.rule(!!!drug_taxonomy)$
    enforce.rules(for_usage)$
    cache_mgr(action = upd) |>
    invisible();
  
  cache_prep(drug_obs_smart_data) |> cache_save(.cache);
}

get.smart("drugs")$use(identifier, metrics, retain = c(route), subset = days_to_market >= 0) |> 
  # View() 
  setkey(route, alt_ndc, days_to_market, on_market_age) |>
  split_f(~route) |>
  map_dbl(\(x) x %$% cor(as.numeric(days_to_market), as.numeric(on_market_age))) |>
  modify_if(is.na, \(x) 0) |> 
  (\(x){ 
    x <- x[order(x)]
    nm <- names(x)
    z <- calc.zero_mean(x, as.zscore = TRUE, use.population = TRUE)
    y <- ratio(x + abs(min(x)), type="pareto", decimals = 6)
    
    .wh_scale <- 800 * c(1.2, .7)
    
    plot_ly(
      x = z
      , y = y
      , size = 5 * exp(x) + 10
      , width = .wh_scale[1]
      , height = .wh_scale[2]
      , hoverinfo = "text"
      , hovertext = sprintf(fmt ="<b>%s</b><br><b>Y:</b> %.2f%% of Total<br><b>Cor</b>(days_to_market, on_market_age): %.2f<br><b>Z-score</b>(X): %.2f", nm, y * 100, x, z)
      , color = x
      , stroke = I("black")
      , type = "scatter", mode = "markers"
      ) |>
      config(mathjax = "cdn", displayModeBar = FALSE) |>
      layout(
        xaxis = list(
            title = list(
              text = "Z-score<sub>X</sub>: X | Cor(m<sub>0</sub>, m<sub>1</sub>) ~ Route"
              , font = list(size = 16, family = "Georgia"))
            , gridcolor = "#FFFFFF"
            )
        , yaxis = list(
            title = list(
              text = "Cumulative Proportion (X)"
              , font = list(size = 16, family = "Georgia"))
            , gridcolor = "#FFFFFF"
            )
        , title = list(
            text = sprintf("Correlation Coefficient (<span style='text-emphasis-position:under; text-emphais: filled red double-circle; '>%s</span> vs. <span style='text-emphasis-position:under; text-emphais: filled red double-circle; '>%s</span>) by Route of Administration", "days_to_market", "on_market_age")
            , font = list(family = "Georgia"))
        , plot_bgcolor = rgb(.8,.8,.8)
        , margin = list(b = 30, t = 50)
        ) 
  })();
Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero

Warning in cor(as.numeric(days_to_market), as.numeric(on_market_age)): the
standard deviation is zero
Warning: `line.width` does not currently support multiple values.

“How does the correlation between days_to_market and days_market_absent change by route of administration?”:

get.smart("drugs")$use(identifier, metrics, retain = c(route), subset = days_to_market >= 0) |> 
  unique() |>
  setkey(route, alt_ndc, days_market_absent, on_market_age) |>
  split_f(~route) |>
  map_dbl(\(x) x %$% { 
    suppressWarnings(cor(as.numeric(days_market_absent), as.numeric(on_market_age)))
  }) |>
  modify_if(is.na, \(x) 0) |> 
  (\(x){ 
    x <- x[order(x)]
    nm <- names(x)
    z <- calc.zero_mean(x, as.zscore = TRUE, use.population = TRUE)
    y <- ratio(x + abs(min(x)), type="pareto", decimals = 6)
    
    .wh_scale <- 800 * c(1.2, .7)
    
    plot_ly(
      x = z
      , y = y
      , size = 5 * exp(x) + 10
      , width = .wh_scale[1]
      , height = .wh_scale[2]
      , hoverinfo = "text"
      , hovertext = sprintf(fmt ="<b>%s</b><br><b>Y:</b> %.2f%% of Total<br><b>Cor</b>(days_to_market, days_market_absent): %.2f<br><b>Z-score</b>(X): %.2f", nm, y * 100, x, z)
      , color = x
      , stroke = I("black")
      , type = "scatter"
      , mode = "markers"
      ) |>
      config(mathjax = "cdn", displayModeBar = FALSE) |>
      layout(
        xaxis = list(
            title = list(
              text = "Z-score<sub>X</sub>: X | Cor(m<sub>0</sub>, m<sub>1</sub>) ~ Route"
              , font = list(size = 16, family = "Georgia"))
            , gridcolor = "#FFFFFF"
            )
        , yaxis = list(
            title = list(
              text = "Cumulative Proportion (X)"
              , font = list(size = 16, family = "Georgia"))
            , gridcolor = "#FFFFFF"
            )
        , title = list(
            text = sprintf("Correlation Coefficient (<span style='text-emphasis-position:under; text-emphais: filled red double-circle; '>%s</span> vs. <span style='text-emphasis-position:under; text-emphais: filled red double-circle; '>%s</span>) by Route of Administration", "days_to_market", "days_market_absent")
            , font = list(family = "Georgia"))
        , plot_bgcolor = rgb(.8,.8,.8)
        , margin = list(b = 30, t = 50)
        ) 
})()
Warning: `line.width` does not currently support multiple values.

Market age relative to days to market shows more variability in correlation distribution. This is not to make an claim of statistically significant differentiation; however, it may be worth exploring whether or not there are clusters of administration routes based on event correlation. A future update may address this, but this is as deep of exploration I’ll go for now.